Problem 1

1… 10

11

11.a A movie should appear in the dataset at least 18 times. Each has a record for the weekend (Friday, Saturday and Sunday) from the opening weekend to at least 6 weekends later (for the ones kept). The ones dropped were not in theaters for more than 6 weekends.

11.b

#keeping films that aren't dropped
films_used <- films |> 
  filter(dropped != 1)

11.c

# day when 12 Rounds came in
round_12_date <- as.Date("2009-03-27")

# Define the number of days to add
days_before <- 17984 #number under 12 Rounds "date" column

# Days prior to the 
reference_date <- round_12_date - days_before

# Print the new date
print(reference_date)
## [1] "1959-12-31"

11.d

films_used_d <- films_used |> 
  mutate(movie_date = as.Date(reference_date + date)) |> 
  #putting the release_date in the 4th column
  select(title, production_budget, release_yr,
         movie_date, sat_date, everything())

films_used_d[, c("title", "movie_date")]

11.e

#first using sat_date to get the date for each saturday
films_used_date <- films_used_d |> 
  mutate(sat_day = as.Date(reference_date +  sat_date)) |> 
  #putting the release_date in the 4th column
  select(title, production_budget, release_yr, 
         movie_date, sat_day, everything())

 #making new columns
films_used_date <- films_used_date |> 
mutate(sat_dummy = ifelse(movie_date == sat_day, 1, 0), 
       #one day before saturday is friday
       fri_dummy = ifelse(movie_date == sat_day - 1, 1, 0),
       #one day 
       sun_dummy = ifelse(movie_date == sat_day + 1, 1, 0)) |> 
  #rearranging... not needed
  select(title, production_budget, release_yr, movie_date, 
         sat_day,sat_dummy, fri_dummy, sun_dummy, everything())

films_used_date[, c("title", "movie_date","sat_day" 
                    ,"fri_dummy", "sat_dummy", "sun_dummy")]

11.f

#creating dummies for week using fastDummies
films_used_date <- films_used_date |>  
  arrange(title, sat_day) |> 
  group_by(title) |> 
  # Assign numeric labels to unique elements of sat_day within each title
  mutate(week = as.integer(factor(sat_day)))  


#Now using fast dummies...
films_used_date <- dummy_cols(films_used_date, select_columns = 'week')
films_used_date[, c("title", "movie_date" ,"week_1", "week_2")]

11.g

#using the "Fast Dummies" library... to automatically create dummies for year
film <- dummy_cols(films_used_date, select_columns = 'release_yr')

film[, c("title", "release_yr", "release_yr_2009", "release_yr_2010")]

11.h

#combine the weekends 
film |> 
 mutate(weekend = case_when(
   sat_dummy == 1 ~ "Saturday",
   fri_dummy == 1 ~ "Friday",
   sun_dummy == 1 ~ "Sunday"
 )) |> 
  group_by(week, weekend) |> 
  summarize(mean = mean(tickets))|> 
  ggplot(aes(x = week, y = mean, color = as.factor(weekend))) +
  geom_point() +
  geom_line() +
  scale_color_manual(values = c("Saturday" = "#4682B4", 
                               "Friday" = "red", 
                               "Sunday" = "#8B008B")) +
  labs(color = "Weekend",
       y = "Tickets",
       x = "Week") +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 6)) +  # Set x-axis ticks
  scale_y_continuous(breaks = scales::pretty_breaks(n = 6)) +  # Set y-axis ticks
  theme_bw()
## `summarise()` has grouped output by 'week'. You can override using the
## `.groups` argument.

## 12 NOT NEEDED

13

#subset colnames that have the hh in them
holiday <- str_subset(colnames(film), "hh")

#make the things in holiday "add"
holiday_dummy <- str_c(holiday, collapse = " + ")

#day of the week dummies
weekend_dummy <- str_c(str_subset(colnames(film), "dummy"), collapse = " + ")

#week of the year dummies
week_dummy <- str_c(str_subset(colnames(film), "week_"), collapse = " + ")

#year of the week dummy
year_dummy <- str_c(str_subset(colnames(film), "release_yr_"), collapse = " + ")

#combine
mod1 <- glue("tickets ~ {weekend_dummy} + {week_dummy} + {year_dummy} + {holiday_dummy}")

#fit a regression model
reg_mod1 <- lm(as.formula(mod1), data = film)


film <- film |> 
  mutate(pred_tickets = predict(reg_mod1, film)) |> 
  mutate(abnormal_viewership = tickets - pred_tickets)

film[, c("tickets","pred_tickets", "abnormal_viewership", "sat_day")]

14

weather <- read_dta("data/weather_collapsed_all.dta")

#adding www to the column names
original_cols <- colnames(weather) 

# adding prefix using the paste 
colnames(weather) <- paste("www", original_cols, sep = "_") 

weather
weather_film <- film |> 
  left_join(weather,
            #combine on dates, automatically filters out dates that don't match
            by = c("sat_day" = "www_sat_date"))

15

#www_columns <- str_subset(colnames(weather_film), "www_")
#
#df <- weather_film 
#regressors <-  glue("~ {weekend_dummy} + {week_dummy} + {year_dummy} + #{holiday_dummy}")
#
#for (columns in www_columns) {
#  model <- paste(columns, regressors)
#  #Below has to be the same! if not it doesn't store the updates...
#  df <- df |> 
#    mutate(!!columns := residuals(lm(as.formula(model), data = df))) 
#}
#
#
#df